Elements Of Data Visualization: Project 3, Sattelite Analysis


By Jose Cortez, Adam Hernandex, and Alex Pearce

GitHub: https://github.com/alexpearce92/DV_RProject3

This project deals with creating interesting graphs concerning climactic weather in the US in 2010, the data of which was gathered and compiled by Enigma. Included are three datasets that were joined to form a single large data frame: satellite payload, satellite launch information, and satellite national origin.

Load necessary packages

source("../01 Data/load_packages.R",echo=TRUE)
## 
## > library("ggplot2")
## 
## > library("gplots")
## 
## > library("grid")
## 
## > library("plyr")
## 
## > library("RCurl")
## 
## > library("reshape2")
## 
## > library("dplyr")
## 
## > library("jsonlite")
## 
## > library("extrafont")

Dataframe Creations

source("../01 Data/create_df.R",echo=TRUE)
## 
## > StormMeta.df <- data.frame(fromJSON(getURL(URLencode("129.152.144.84:5001/rest/native/?query=\"select * from storm_meta\""), 
## +     httpheader = c(D .... [TRUNCATED] 
## 
## > StormPath.df <- data.frame(fromJSON(getURL(URLencode("129.152.144.84:5001/rest/native/?query=\"select * from storm_path\""), 
## +     httpheader = c(D .... [TRUNCATED] 
## 
## > StormImpact.df <- data.frame(fromJSON(getURL(URLencode("129.152.144.84:5001/rest/native/?query=\"select * from storm_impact\""), 
## +     httpheader = .... [TRUNCATED] 
## 
## > tbl_df(StormPath.df)
## Source: local data frame [610,351 x 12]
## 
##    EVENT_ID WEATHER_FORECAST_ORGANIZATION MAGNITUDE MAGNITUDE_TYPE
## 1   5130995                           BUF      null           null
## 2   5130670                           BGM        50              E
## 3   5131457                           TFX        50              M
## 4   5127579                           PIH      null           null
## 5   5127907                           AJK      null           null
## 6   5127580                           PIH      null           null
## 7   5130927                           BUF        44              M
## 8   5130996                           BUF        43              M
## 9   5130997                           BUF      null           null
## 10  5130998                           BUF      null           null
## ..      ...                           ...       ...            ...
## Variables not shown: BEGIN_RANGE (fctr), BEGIN_LOCATION (fctr), END_RANGE
##   (fctr), END_LOCATION (fctr), BEGIN_LAT (fctr), BEGIN_LON (fctr), END_LAT
##   (fctr), END_LON (fctr)
## 
## > tbl_df(StormImpact.df)
## Source: local data frame [610,351 x 7]
## 
##    EVENT_ID INJURIES_DIRECT INJURIES_INDIRECT DEATHS_DIRECT
## 1   5129401               0                 0             0
## 2   5129693               0                 0             0
## 3   5127644               0                 0             0
## 4   5131077               0                 0             0
## 5   5131804               0                 0             0
## 6   5129402               0                 0             0
## 7   5127031               0                 0             0
## 8   5131395               0                 0             0
## 9   5127645               0                 0             0
## 10  5127643               0                 0             0
## ..      ...             ...               ...           ...
## Variables not shown: DEATHS_INDIRECT (int), DAMAGE_PROPERTY (dbl),
##   DAMAGE_CROPS (int)
## 
## > tbl_df(StormMeta.df)
## Source: local data frame [610,351 x 6]
## 
##    EVENT_ID    CZ_NAME       STATE YEAR MONTH_NAME        EVENT_TYPE
## 1   5128715     GREENE    ARKANSAS 2000  January   Thunderstorm Wind
## 2   5126948 MIAMI-DADE     FLORIDA 2000  January         Rip Current
## 3   5131034  SUNFLOWER MISSISSIPPI 2000  January   Thunderstorm Wind
## 4   5131737    MADISON   LOUISIANA 2000  January   Thunderstorm Wind
## 5   5128716  CRAIGHEAD    ARKANSAS 2000  January           Lightning
## 6   5131033     WARREN MISSISSIPPI 2000  January   Thunderstorm Wind
## 7   5128711  CRAIGHEAD    ARKANSAS 2000  January   Thunderstorm Wind
## 8   5130439   CRAWFORD     INDIANA 2000  January                Hail
## 9   5131948   STODDARD    MISSOURI 2000  January   Thunderstorm Wind
## 10  5130440   CRAWFORD     INDIANA 2000  January   Thunderstorm Wind
## ..      ...        ...         ...  ...        ...               ...

Plotting the destruction of each type of storm

Here we will join two datasets so that we may determine the destructive damage of each type of storm over time.

Create the data set using an outer join

source("../02 Data Wrangling/impact_per_year.R", echo=TRUE)
## 
## > impact_per_year.df <- dplyr::left_join(StormMeta.df, 
## +     StormImpact.df, by = "EVENT_ID") %>% mutate(INJURIES = INJURIES_DIRECT + 
## +     INJURIES .... [TRUNCATED] 
## 
## > tbl_df(impact_per_year.df)
## Source: local data frame [324 x 5]
## 
##    YEAR              EVENT_TYPE sumDeaths sumInjuries sumDamage
## 1  2000               Avalanche        16          17    0.7550
## 2  2000                Blizzard         1           0    4.7820
## 3  2000         Cold/Wind Chill         8           0    0.0000
## 4  2000               Dense Fog        10         118    1.8160
## 5  2000              Dust Storm         1          29    0.1900
## 6  2000          Excessive Heat         0           2    0.0000
## 7  2000 Extreme Cold/Wind Chill        18           0    3.7150
## 8  2000             Flash Flood        30          36  858.3329
## 9  2000                   Flood         9          11 1076.0630
## 10 2000                    Hail         2          57  570.8990
## ..  ...                     ...       ...         ...       ...

Plot each storm’s deaths and injuries

Below we see plots of each storms deaths and injuries each year. According to the dataset, the deadliest storm was a heat wave in 2006. Injuries, however, have large spikes in excessive heat (2007), heat (2006), hyrricane/typhoons (2008) and tornado (2008) storms. Although hurricane Katrina has the highest casualty count in US history of any storm, this data is missing from the 2005 data set provided by Enigma.

source("../03 Visualizations/deaths_per_year_plot.R", echo=TRUE)
## 
## > ggplot(impact_per_year.df, aes(x = YEAR), na.rm = "TRUE") + 
## +     geom_line(aes(y = sumDeaths, color = "Deaths")) + geom_line(aes(y = sumInjuries,  .... [TRUNCATED]

Here we compare the damage to property and crops of each storm. Note that damage is measured in millions of US dollars. According to the plots below, it appears that the most devastating storms for property and crops were floods (2006), followed by Hurricanes and Storm Surges (2005). Like we found above, much of the data from Hurricane Katrina is missing from this data set, which is why the 2005 event in hurricanes is not dwarfing that of the 2006 flood.

source("../03 Visualizations/damage_per_year_plot.R", echo=TRUE)
## 
## > ggplot(impact_per_year.df, aes(x = YEAR), na.rm = "TRUE") + 
## +     geom_line(aes(y = sumDamage, color = "Damage"), show_guide = FALSE) + 
## +     face .... [TRUNCATED]


Adams stuff


Cortez’s stuff


Categorical Statistics

We will now generate a characterization of the categorical columns represented in the tables

Generate lists of categorical and numerical columns

source("../01 Data/create_categoricals.R",echo=TRUE)
## 
## > CategoricalsStormPath <- eval(parse(text = substring(getURL(URLencode("http://129.152.144.84:5001/rest/native/?query=\"select * from storm_path\""), .... [TRUNCATED] 
## 
## > CategoricalsStormMeta <- eval(parse(text = substring(getURL(URLencode("http://129.152.144.84:5001/rest/native/?query=\"select * from storm_meta\""), .... [TRUNCATED] 
## 
## > CategoricalsStormImpact <- eval(parse(text = substring(getURL(URLencode("http://129.152.144.84:5001/rest/native/?query=\"select * from storm_impact\ .... [TRUNCATED]

Create characterization and generate png file for each table

Notice that StormImpact does not generate any categorical plots because it only contains numerical columns

source("../03 Visualizations/storm_impact_categoricals.R",echo=TRUE)
## 
## > myplot <- function(df, x) {
## +     names(df) <- c("x")
## +     ggplot(df, aes(x = x), na.rm = TRUE) + geom_histogram() + 
## +         scale_x_discrete(x) .... [TRUNCATED] 
## 
## > StormImpactPlotList <- list()
## 
## > for (i in names(StormImpact.df)) {
## +     if (i %in% CategoricalsStormImpact[[1]]) {
## +         r <- data.frame(fromJSON(getURL(URLencode("129.152.144 ..." ... [TRUNCATED] 
## 
## > if (length(StormImpactPlotList) > 0) {
## +     png("../00 Doc/StormImpactCategoricals.png", width = 35, 
## +         height = 50, units = "in", res = 72 .... [TRUNCATED]
source("../03 Visualizations/storm_meta_categoricals.R",echo=TRUE)
## 
## > myplot <- function(df, x) {
## +     names(df) <- c("x")
## +     ggplot(df, aes(x = x), na.rm = TRUE) + geom_histogram() + 
## +         scale_x_discrete(x) .... [TRUNCATED] 
## 
## > StormMetaPlotList <- list()
## 
## > for (i in names(StormMeta.df)) {
## +     if (i %in% CategoricalsStormMeta[[1]]) {
## +         r <- data.frame(fromJSON(getURL(URLencode("129.152.144.84: ..." ... [TRUNCATED]
## Warning: position_stack requires constant width: output may be incorrect

## 
## > if (length(StormMetaPlotList) > 0) {
## +     png("../00 Doc/StormMetaCategoricals.png", width = 35, height = 50, 
## +         units = "in", res = 72)
## +  .... [TRUNCATED]
## Warning: position_stack requires constant width: output may be incorrect

## pdf 
##   2
source("../03 Visualizations/storm_path_categoricals.R",echo=TRUE)
## 
## > myplot <- function(df, x) {
## +     names(df) <- c("x")
## +     ggplot(df, aes(x = x), na.rm = TRUE) + geom_histogram() + 
## +         scale_x_discrete(x) .... [TRUNCATED] 
## 
## > StormPathPlotList <- list()
## 
## > for (i in names(StormPath.df)) {
## +     if (i %in% CategoricalsStormPath[[1]]) {
## +         r <- data.frame(fromJSON(getURL(URLencode("129.152.144.84: ..." ... [TRUNCATED]

## Warning: position_stack requires constant width: output may be incorrect

## Warning: position_stack requires constant width: output may be incorrect
## 
## > if (length(StormPathPlotList) > 0) {
## +     png("../00 Doc/StormPathCategoricals.png", width = 35, height = 50, 
## +         units = "in", res = 72)
## +  .... [TRUNCATED]
## Warning: position_stack requires constant width: output may be incorrect
## Warning: position_stack requires constant width: output may be incorrect

## pdf 
##   2

And here are the png files that we created

StormPath Categoricals

### StormMeta Categoricals